clear all 
set more off 
set maxvar 15000 
clear matrix
    
	use "$Mydirectory1/3_Output/2_PooledData_analysis.dta", clear 
    keep if baseline_sample==1

    gen early = decade==1910 
    gen late =  decade==1940 
    keep if early==1 | late==1
            
    * Calculate and save interaction term with standard error 
    gen interaction2 = late * log_father_baseline  
    reg log_son_baseline log_father_baseline late interaction2 [aw=wgt_sex_race], robust
    display _b[interaction2] 
    
    local int1940_ige = _b[interaction]
    local interaction1940_ige : display %-04.2fc `int1940_ige'

    local se_int1940_ige = _se[interaction]
    local se_interaction1940_ige : display %-04.2fc `se_int1940_ige'
 
        
    *Rank measures
    gen interaction = late * rank_father_baseline
    reg rank_son_baseline rank_father_baseline late interaction [aw=wgt_sex_race], robust
    
    local int1940_rank = _b[interaction]
    local interaction1940_rank : display %-04.2fc `int1940_rank'
    display `interaction1940_rank'

    local se_int1940_rank = _se[interaction]
    local se_interaction1940_rank : display %-04.2fc `se_int1940_rank'
        
        
    tempfile fulldata
    save `fulldata'

*-------------------*
* BINSCATTERS
*-------------------*
   
    * 1. Run binscatters to save the scatter points    
    foreach name in early late { 
    
        //1. rank-rank  
            binscatter rank_son_baseline rank_father_baseline if `name'==1 [aw=wgt_sex_race], nq(10) savedata("binscatter_rank_all_`name'") replace
            
        //2. ige
            binscatter log_son_baseline log_father_baseline if `name'==1 [aw=wgt_sex_race], nq(10) savedata("binscatter_ige_all_`name'") replace

    } 

    foreach measure in rank ige {

        foreach name in early late { 
            insheet using "binscatter_`measure'_all_`name'.csv", clear 
            gen period="`name'"
            gen measure ="`measure'"
            save "binscatter_`measure'_all_`name'.dta", replace 
        } 
    }
    
    use  "binscatter_rank_all_early", clear
    append using "binscatter_rank_all_late"
    append using "binscatter_ige_all_early"
    append using "binscatter_ige_all_late"
    
    * Run the regressions on the full panel; save constant and slope 
    gen slope=.
    gen intercept=.

    foreach measure in rank ige  {
    foreach name in early late { 
            preserve
            use `fulldata', clear
            
        if "`measure'"=="ige"  {
            keep if `name'==1
            
            reg log_son_baseline log_father_baseline [aw=wgt_sex_race]
                
            restore
                    
            replace slope = _b[log_father_baseline] if period=="`name'" & measure=="ige"
            replace intercept = _b[_cons] if period=="`name'" & measure=="ige"
            
        }
    
        if "`measure'"=="rank"  {
            keep if `name'==1

            reg rank_son_baseline rank_father_baseline [aw=wgt_sex_race]
            restore 
    
            replace slope = _b[rank_father_baseline] if period=="`name'" & measure=="rank"
            replace intercept = _b[_cons] if period=="`name'" & measure=="rank"
            }
        } 
    }   
    
    * Generate predicted values for each point using slope and intercept
    gen yhat_line = intercept + (slope*rank_father_baseline) if measure=="rank"
    replace yhat_line = intercept + (slope*log_father_baseline) if measure=="ige"

*--------------*
* FIGURES
*--------------*

    * IGE
        foreach name in early late {
    
            sum slope if period=="`name'" & measure=="ige"
                local s`name' =`r(mean)'
                local slope`name': display %-04.2fc `s`name''   
        
            sum intercept if period=="`name'" & measure=="ige"
                local int`name' =`r(mean)'
                local intercept`name': display %-04.2fc `int`name'' 
        }

    #delimit ;
        twoway (scatter log_son_baseline log_father_baseline if period=="early" & measure=="ige", m(oh) mc(blue*1.1)) 
               (line yhat_line log_father_baseline if period=="early" & measure=="ige", lp(dash) lc(blue*1.1) lwidth(0.25))  
               (scatter log_son_baseline log_father_baseline if period=="late" & measure=="ige", m(+) mc(orange_red)) 
               (line yhat_line log_father_baseline if period=="late" & measure=="ige", lp(dash) lc(orange_red) lwidth(0.25)),
           
        legend(on rows(2) order(1 "1910-1919 Cohorts (Int.=`interceptearly', Slope=`slopeearly')" 3 "1940-1949 Cohorts (Int.=`interceptlate', Slope=`slopelate')") ring(0) pos(5))  
        ylabel(7.5(0.5)9, axis(1)) yscale(r(7.5 9.25)) xscale(r(6.5 9)) xlabel(6.5(0.5)9)
        text(8.9 7 "Slope Difference: `interaction1940_ige'" 8.815 7 "SE: `se_interaction1940_ige'", color(black) size(medsmall))          
        xti(" " "Predicted parental logged income") yti("Respondent logged income" " ")  ; 
    #delimit cr
    graph export "$Mydirectory2/main_figures_tables/figure2_ige.pdf", replace  


    * RANK-RANK
        foreach name in early late {
    
            sum slope if period=="`name'" & measure=="rank"
                local s`name' =`r(mean)'
                local slope`name': display %-04.2fc `s`name''   
        
            sum intercept if period=="`name'" & measure=="rank"
                local int`name' =`r(mean)'
                local intercept`name': display %-04.2fc `int`name'' 
        }
    
    #delimit ;
        twoway (scatter rank_son_baseline rank_father_baseline if period=="early" & measure=="rank", m(oh) mc(blue*1.1)) 
           (line yhat_line rank_father_baseline if period=="early" & measure=="rank", lp(dash) lc(blue*1.1) lwidth(0.25))  
           (scatter rank_son_baseline rank_father_baseline if period=="late" & measure=="rank", m(+) mc(orange_red)) 
           (line yhat_line rank_father_baseline if period=="late" & measure=="rank", lp(dash) lc(orange_red) lwidth(0.25)),
           
        legend(on rows(2) order(1 "1910-1919 Cohorts (Int.=`interceptearly', Slope=`slopeearly')" 3 "1940-1949 Cohorts (Int.=`interceptlate', Slope=`slopelate')")
        ring(0) pos(5))  ylabel(20(10)70, axis(1)) yscale(r(20 70))
        text(60 20 "Slope Difference: `interaction1940_rank'" 57.5 10.84 "SE: `se_interaction1940_rank'", color(black) size(medsmall))
        xti(" " "Predicted parental income rank") yti("Respondent income rank" " ")  ;
    #delimit cr
    graph export "$Mydirectory2/main_figures_tables/figure2_rank.pdf", replace  


    * Get rid of all binscatter-related files
        foreach measure in ige rank {
            foreach name in early late   { 
                cap rm "binscatter_`measure'_all_`name'.csv" 
                cap rm "binscatter_`measure'_all_`name'.do" 
                cap rm "binscatter_`measure'_all_`name'.dta" 
                
                cap rm "binscatter_`measure'_all_`name'.csv" 
                cap rm "binscatter_`measure'_all_`name'.do" 
                cap rm "binscatter_`measure'_all_`name'.dta" 
            } 
        }       
        